# -*- coding: utf-8 -*-
import scrapy
from scrapy.http.response.html import HtmlResponse
#可以通过xpath 和  css 来解析
from ..items import QsbkItem
class QsbkCrawerSpider(scrapy.Spider):
    name = 'qsbk_crawer'
    allowed_domains = ['qiushibaike.com']
    start_urls = ['https://www.qiushibaike.com/text/page/1/']
    base_domain = 'https://www.qiushibaike.com'

    def parse(self, response):
        duanzidiv = response.xpath("//div[@id='content-left']/div")

        for duanzi in duanzidiv:
            author = duanzi.xpath(".//h2/text()").get().strip()
            content = duanzi.xpath(".//div[@class='content']//text()").getall()
            content = "".join(content).strip()
            item = QsbkItem(author=author,content=content)
            yield item
        next_url = response.xpath("//ul[@class='pagination']/li[last()]/a/@href").get()
        if not next_url:
            return
        else:
            yield scrapy.Request(self.base_domain+next_url,callback=self.parse)


